suppressPackageStartupMessages({
library(here)
library(hyperSpec)
library(gplots)
library(ggplot2)
library(magrittr)
library(tidyverse)
})
#silence chunks
knitr::opts_chunk$set(message = FALSE, warning = FALSE)We use a combination of the Sequence Description and Gene Ontology (GO) terms to fetch cell-wall-related gene candidates
# Annotation
annotation <- read_tsv(here("data/b2g/blast2go_20190117_export.txt.gz"),show_col_types=FALSE) %>%
mutate(`Sequence Name`=sub("\\.p\\d+$","",`Sequence Name`))
# GO dictionary
go_annot <- annotation %>% select(`Annotation GO ID`,`Annotation GO Term`,`Annotation GO Category`) %>%
rename(ID=`Annotation GO ID`,Term=`Annotation GO Term`,Category=`Annotation GO Category`) %>%
filter(!is.na(ID))
go_annot <- tibble(ID=unlist(str_split(go_annot$ID,"\\|"),use.names=FALSE),
Term=unlist(str_split(go_annot$Term,"\\|"),use.names=FALSE),
Category=unlist(str_split(go_annot$Category,"\\|"),use.names=FALSE)) %>% distinct()
# Expression
expression <- read_tsv(here("data/analysis/salmon/variance-stabilised_model-aware_gene-expression_data.tsv"),
show_col_types=FALSE)
# Reverse sample swap
expression %<>% rename(B2_2_24hrs_D="B2_2_12hrs_D",B2_2_12hrs_D="B2_2_24hrs_D")
# Gene of interest
goi <- unlist(annotation[grepl("cell wall",annotation$`Sequence Description`) |
grepl("cell wall",annotation$`Annotation GO Term`),"Sequence Name"],use.names=FALSE)
#' replace (\\.p\\d+$) .p followed by any number of digits. The $ the sign that indicates the end of the text
goi <- sub("\\.p\\d+$","",goi)
#' sanity
stopifnot(length(goi)==length(unique(goi)))
message(sprintf("There are %s candidates",length(goi)))
#' * samples information
samples <-read_tsv(here("doc/Samples-swap-corrected.tsv"),show_col_types = FALSE)
#' ## Expression
vst <- expression %>% filter(rowname %in% goi) %>% column_to_rownames() %>% as.matrix()
sel <- rowSums(vst) > 0
vst <- vst[sel,]
samples <- samples[match(colnames(vst),samples$SampleID),]
# Export
# write_tsv(vst %>% as_tibble() %>% rownames_to_column("ID"),
# here("data/analysis/cell-wall/goi-vst-expression_with-geneID.tsv"))Next, we look at the correlation between cell wall thickness and change in expression (mean of 4 replicates). We keep only the time points that are present in both datasets:
1h, 4hs, 12hs,
24hs, 72hs, 120hs
#cell wall
cw <- read_tsv(here("doc/Cell-wall-measurement.tsv"))
cw %<>% filter(!time_in_hours %in% c(48,240))
cw_ctrl <- cw %>% filter(treatment %in% "control")
cw_cold <- cw %>% filter(treatment %in% "cold")
colnames(vst) %<>% gsub(pattern = "B2_2_", replacement = "")
colnames(vst) %<>% gsub(pattern = "60min", replacement = "1hrs_")
vst %<>% as.data.frame()
vst$gene <- rownames(vst)
#calculating the average expression of reps
vst_avg <- vst %>% pivot_longer(cols = -gene, names_to = c("time", "rep"), names_sep = "_") %>%
group_by(time, gene) %>%
summarize(meanExp = mean(value)) %>%
pivot_wider(names_from = time, values_from = meanExp)
#reorder the columns based on the cw data
vst_avg <- vst_avg[, c(1, match(paste0(cw_ctrl$time_in_hours, "hrs"), colnames(vst_avg)))]
#correlation
vst_avg %<>%
rowwise() %>%
mutate(ctrl_cor = cor(c_across(`1hrs`:`120hrs`),
cw_ctrl$width_in_um %>% as.vector(),
method = "pearson"),
cold_cor = cor(c_across(`1hrs`:`120hrs`),
cw_cold$width_in_um %>% as.vector(),
method = "pearson"))Below we look at the distribution of the correlation values for cell wall genes.
# Reshape data for ggplot
df <- vst_avg %>%
select(ctrl_cor,cold_cor) %>%
pivot_longer(cols = everything()) %>%
mutate(Color = ifelse(value >= 0.7, "red",
ifelse(value <= -0.7, "blue", "gray")))
# Create the plot
ggplot(data = df, aes(x = name, y = value)) +
geom_violin() +
geom_jitter(aes(color = Color), width = 0.2) +
scale_color_manual(values = c("blue", "grey", "red")) +
theme(legend.position = "none", text = element_text(size = 13)) +
labs(x = "", y = "Correlation values") +
geom_hline(yintercept = 0.7, linetype = "dashed", color = "red") +
geom_hline(yintercept = -0.7, linetype = "dashed", color = "blue")#frequencies
df %<>%
mutate(
range = factor(case_when(
(value >= -1 & value < -0.9) ~ "-1",
(value >= -0.9 & value < -0.8) ~ "-0.9",
(value >= -0.8 & value < -0.7) ~ "-0.8",
(value >= -0.7 & value < -0.6) ~ "-0.7",
(value >= -0.6 & value < -0.5) ~ "-0.6",
(value >= -0.5 & value < -0.4) ~ "-0.5",
(value >= -0.4 & value < -0.3) ~ "-0.4",
(value >= -0.3 & value < -0.2) ~ "-0.3",
(value >= -0.2 & value < -0.1) ~ "-0.2",
(value >= -0.1 & value < 0) ~ "-0.1",
(value >= 0 & value < 0.1) ~ "0.1",
(value >= 0.1 & value < 0.2) ~ "0.2",
(value >= 0.2 & value < 0.3) ~ "0.3",
(value >= 0.3 & value < 0.4) ~ "0.4",
(value >= 0.4 & value < 0.5) ~ "0.5",
(value >= 0.5 & value < 0.6) ~ "0.6",
(value >= 0.6 & value < 0.7) ~ "0.7",
(value >= 0.7 & value < 0.8) ~ "0.8",
(value >= 0.8 & value < 0.9) ~ "0.9",
(value >= 0.9 & value <= 1) ~ "1",
TRUE ~ NA_character_),
levels = c( "-1", "-0.9", "-0.8", "-0.7", "-0.6", "-0.5", "-0.4", "-0.3", "-0.2", "-0.1",
"0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "1"))) %>%
group_by(range, name) %>%
summarise(freq = n()) %>%
ungroup() %>%
mutate(barcol = case_when(name == "ctrl_cor" ~ "forestgreen",
name == "cold_cor" ~"deepskyblue"))
df %>%
ggplot(aes(y = ifelse(test = name == "cold_cor", yes = -freq, no = freq),
x = range, fill = name)) +
geom_col() +
scale_y_continuous() +
scale_fill_manual(values = unique(df$barcol)) +
labs(x = "Correlation", y = "Number of genes") +
theme(text = element_text(size = 12))Next, we identify the mfuzz clusters associated with our highly correlated genes.
#genes highly correlated with widths change in cold
goi_cold_up <- vst_avg %>% filter(cold_cor >= 0.7) %>% .$gene
goi_cold_dn <- vst_avg %>% filter(cold_cor <= -0.7) %>% .$gene
goi_ctrl_up <- vst_avg %>% filter(ctrl_cor >= 0.7) %>% .$gene
goi_ctrl_dn <- vst_avg %>% filter(ctrl_cor <= -0.7) %>% .$gene
#mfuzz clusters
clusts <- read_csv(here("data/mfuzz/clustermembership.csv"))
clusts <- clusts[,-1]
clusts %>% filter(NAME %in% goi_cold_up) %>% .$CLUSTER %>% table()## .
## 2 3 5 8
## 13 10 1 2
clusts %>% filter(NAME %in% goi_cold_dn) %>% .$CLUSTER %>% table()## .
## 1 4 7 8
## 2 2 1 1
clusts %>% filter(NAME %in% goi_ctrl_up) %>% .$CLUSTER %>% table()## .
## 2 3 8
## 8 3 2
clusts %>% filter(NAME %in% goi_ctrl_dn) %>% .$CLUSTER %>% table()## .
## 4
## 1
sessionInfo()## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.1
## [5] purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [9] tidyverse_2.0.0 magrittr_2.0.3 gplots_3.1.3 hyperSpec_0.100.0
## [13] xml2_1.3.4 ggplot2_3.4.2 lattice_0.20-45 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.10 deldir_1.0-6 png_0.1-8
## [4] gtools_3.9.4 rprojroot_2.0.3 digest_0.6.31
## [7] utf8_1.2.3 R6_2.5.1 evaluate_0.21
## [10] highr_0.10 pillar_1.9.0 rlang_1.1.0
## [13] lazyeval_0.2.2 rstudioapi_0.14 jquerylib_0.1.4
## [16] rmarkdown_2.21 labeling_0.4.2 bit_4.0.5
## [19] munsell_0.5.0 compiler_4.2.0 xfun_0.38
## [22] pkgconfig_2.0.3 htmltools_0.5.5 tidyselect_1.2.0
## [25] fansi_1.0.4 crayon_1.5.2 tzdb_0.3.0
## [28] withr_2.5.0 bitops_1.0-7 brio_1.1.3
## [31] jsonlite_1.8.4 gtable_0.3.3 lifecycle_1.0.3
## [34] DBI_1.1.3 scales_1.2.1 KernSmooth_2.23-20
## [37] cli_3.6.1 stringi_1.7.12 vroom_1.6.1
## [40] cachem_1.0.8 farver_2.1.1 testthat_3.1.7
## [43] latticeExtra_0.6-30 bslib_0.4.2 generics_0.1.3
## [46] vctrs_0.6.1 RColorBrewer_1.1-3 tools_4.2.0
## [49] interp_1.1-4 bit64_4.0.5 glue_1.6.2
## [52] hms_1.1.3 jpeg_0.1-10 parallel_4.2.0
## [55] fastmap_1.1.1 yaml_2.3.7 timechange_0.2.0
## [58] colorspace_2.1-0 caTools_1.18.2 knitr_1.42
## [61] sass_0.4.6
Created by Aman Zare